# -*- coding: utf-8 -*-
'''中国新闻网滚动新闻爬虫'''
import scrapy
import logging

class ChinanewsSpiderSpider(scrapy.Spider):
    name = 'chinanews_spider'
    number=10 #滚动新闻可选择的页码数

    def start_requests(self):
        urls = []
        for i in range(self.number):
            urls.append('http://www.chinanews.com/scroll-news/news{0}.html'.format(i+1)) 
        for url in urls:
            yield scrapy.Request(url=url, callback=self.parse)

    def parse(self, response):
        '''
        提取到新闻正文的链接
        '''
        for href in response.css('div.dd_bt a::attr(href)').getall():
            yield response.follow(href,self.parse_news)


    def parse_news(self,response):
        '''
        提取新闻正文的文本内容
        '''
        texts=response.css('div.left_zw p::text').getall()
        article=''.join(texts)
        yield {'article':article}
